https://www.tensorflow.org/tfmodels/vision/object_detection

https://public.roboflow.com/object-detection/aquarium

Install necessary dependencies¶

In [ ]:
!pip install -U -q "tf-models-official"
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.7/2.7 MB 26.7 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 106.6/106.6 kB 11.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 43.6/43.6 kB 4.5 MB/s eta 0:00:00
  Preparing metadata (setup.py) ... done
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 242.5/242.5 kB 20.5 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.2/5.2 MB 55.2 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 589.8/589.8 MB 1.5 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 58.6 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.8/4.8 MB 34.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 56.6 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.5/5.5 MB 67.9 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 39.0 MB/s eta 0:00:00
  Building wheel for seqeval (setup.py) ... done

Import required libraries¶

In [ ]:
import os
import io
import pprint
import tempfile
import matplotlib
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from PIL import Image
from six import BytesIO
from IPython import display
from urllib.request import urlopen

Import required libraries from tensorflow models¶

In [ ]:
import orbit
import tensorflow_models as tfm

from official.core import exp_factory
from official.core import config_definitions as cfg
from official.vision.serving import export_saved_model_lib
from official.vision.ops.preprocess_ops import normalize_image
from official.vision.ops.preprocess_ops import resize_and_crop_image
from official.vision.utils.object_detection import visualization_utils
from official.vision.dataloaders.tf_example_decoder import TfExampleDecoder

pp = pprint.PrettyPrinter(indent=4) # Set Pretty Print Indentation
print(tf.__version__) # Check the version of tensorflow used

%matplotlib inline
2.16.1

Prepare Data¶

  • Convert Train, Validation & Test dataset to TF Resort
In [ ]:
#Start by connecting gdrive into the google colab
from google.colab import drive
drive.mount('/content/gdrive')
Mounted at /content/gdrive
In [ ]:
#!curl -L "https://public.roboflow.com/ds/ZpYLqHeT0W?key=ZXfZLRnhsc"> "./BCCD.v1-bccd.coco.zip"

#!unzip -q -o './BCCD.v1-bccd.coco.zip' -d './BCC.v1-bccd.coco/'

#!rm './BCCD.v1-bccd.coco.zip'

#/content/Aquarium

!unzip -q -o '/content/gdrive/MyDrive/Dataset/Aquarium Combined.v2-raw-1024.coco.zip' -d './Aquarium'

CLI command to convert data(train data)¶

In [ ]:
TRAIN_DATA_DIR='./Aquarium/train'
TRAIN_ANNOTATION_FILE_DIR='./Aquarium/train/_annotations.coco.json'
OUTPUT_TFRECORD_TRAIN='./aquarium_tfrecords/train'

!echo $TRAIN_ANNOTATION_FILE_DIR
print(os.path.exists(TRAIN_ANNOTATION_FILE_DIR))

# Need to provide
# 1. image_dir: where images are present
# 2. object_annotations_file: where annotations are listed in json format
# 3. output_file_prefix: where to write output convered TFRecords files

!python -m official.vision.data.create_coco_tf_record --logtostderr \
  --image_dir=$TRAIN_DATA_DIR \
  --object_annotations_file=$TRAIN_ANNOTATION_FILE_DIR \
  --output_file_prefix=$OUTPUT_TFRECORD_TRAIN \
  --num_shards=1
./Aquarium/train/_annotations.coco.json
True
2024-03-18 03:02:53.094964: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
I0318 03:02:55.631162 140226332578944 create_coco_tf_record.py:502] writing to output path: ./aquarium_tfrecords/train
I0318 03:02:55.653593 140226332578944 create_coco_tf_record.py:374] Building bounding box index.
I0318 03:02:55.654310 140226332578944 create_coco_tf_record.py:385] 1 images are missing bboxes.
I0318 03:02:56.151755 140226332578944 tfrecord_lib.py:168] On image 0
I0318 03:02:56.175168 140226332578944 tfrecord_lib.py:168] On image 100
I0318 03:02:56.190558 140226332578944 tfrecord_lib.py:168] On image 200
I0318 03:02:56.205687 140226332578944 tfrecord_lib.py:168] On image 300
I0318 03:02:56.222146 140226332578944 tfrecord_lib.py:168] On image 400
I0318 03:02:56.245349 140226332578944 tfrecord_lib.py:180] Finished writing, skipped 0 annotations.
I0318 03:02:56.248928 140226332578944 create_coco_tf_record.py:537] Finished writing, skipped 0 annotations.

CLI command to convert data(validation data)¶

In [ ]:
VALID_DATA_DIR='./Aquarium/valid'
VALID_ANNOTATION_FILE_DIR='./Aquarium/valid/_annotations.coco.json'
OUTPUT_TFRECORD_VALID='./aquarium_tfrecords/valid'

!python -m official.vision.data.create_coco_tf_record --logtostderr \
  --image_dir=$VALID_DATA_DIR \
  --object_annotations_file=$VALID_ANNOTATION_FILE_DIR \
  --output_file_prefix=$OUTPUT_TFRECORD_VALID \
  --num_shards=1
2024-03-18 03:03:05.415507: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
I0318 03:03:07.980000 140104538027136 create_coco_tf_record.py:502] writing to output path: ./aquarium_tfrecords/valid
I0318 03:03:07.986214 140104538027136 create_coco_tf_record.py:374] Building bounding box index.
I0318 03:03:07.986508 140104538027136 create_coco_tf_record.py:385] 0 images are missing bboxes.
I0318 03:03:08.145830 140104538027136 tfrecord_lib.py:168] On image 0
I0318 03:03:08.161374 140104538027136 tfrecord_lib.py:168] On image 100
I0318 03:03:08.178261 140104538027136 tfrecord_lib.py:180] Finished writing, skipped 0 annotations.
I0318 03:03:08.179609 140104538027136 create_coco_tf_record.py:537] Finished writing, skipped 0 annotations.

CLI command to convert data(test data)¶

In [ ]:
TEST_DATA_DIR='./Aquarium/test'
TEST_ANNOTATION_FILE_DIR='./Aquarium/test/_annotations.coco.json'
OUTPUT_TFRECORD_TEST='./aquarium_tfrecords/test'

!python -m official.vision.data.create_coco_tf_record --logtostderr \
  --image_dir=$TEST_DATA_DIR \
  --object_annotations_file=$TEST_ANNOTATION_FILE_DIR \
  --output_file_prefix=$OUTPUT_TFRECORD_TEST \
  --num_shards=1
2024-03-18 03:06:52.560972: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
I0318 03:06:55.150396 138325566469248 create_coco_tf_record.py:502] writing to output path: ./aquarium_tfrecords/test
I0318 03:06:55.154611 138325566469248 create_coco_tf_record.py:374] Building bounding box index.
I0318 03:06:55.154825 138325566469248 create_coco_tf_record.py:385] 0 images are missing bboxes.
I0318 03:06:55.238603 138325566469248 tfrecord_lib.py:168] On image 0
I0318 03:06:55.262249 138325566469248 tfrecord_lib.py:180] Finished writing, skipped 2 annotations.
I0318 03:06:55.263144 138325566469248 create_coco_tf_record.py:537] Finished writing, skipped 2 annotations.

Configure the Retinanet Resnet FPN COCO model for custom dataset.¶

In [ ]:
train_data_input_path = './aquarium_tfrecords/train-00000-of-00001.tfrecord'
valid_data_input_path = './aquarium_tfrecords/valid-00000-of-00001.tfrecord'
test_data_input_path = './aquarium_tfrecords/test-00000-of-00001.tfrecord'
model_dir = './trained_model/'
export_dir ='./exported_model/'

In Model Garden, the collections of parameters that define a model are called configs. Model Garden can create a config based on a known set of parameters via a factory.

Use the retinanet_resnetfpn_coco experiment configuration, as defined by tfm.vision.configs.retinanet.retinanet_resnetfpn_coco.

The configuration defines an experiment to train a RetinanNet with Resnet-50 as backbone, FPN as decoder. Default Configuration is trained on COCO train2017 and evaluated on COCO val2017.

There are also other alternative experiments available such as retinanet_resnetfpn_coco, retinanet_spinenet_coco, fasterrcnn_resnetfpn_coco and more. One can switch to them by changing the experiment name argument to the get_exp_config function.

We are going to fine tune the Resnet-50 backbone checkpoint which is already present in the default configuration.

In [ ]:
exp_config = exp_factory.get_exp_config('retinanet_resnetfpn_coco')
print(exp_config)
ExperimentConfig(task=RetinaNetTask(init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', model=RetinaNet(num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, anchor=Anchor(num_scales=3, aspect_ratios=[0.5, 1.0, 2.0], anchor_size=4.0), backbone=Backbone(type='resnet', resnet=ResNet(model_id=50, depth_multiplier=1.0, stem_type='v0', se_ratio=0.0, stochastic_depth_drop_rate=0.0, scale_stem=True, resnetd_shortcut=False, replace_stem_max_pool=False, bn_trainable=True), dilated_resnet=DilatedResNet(model_id=50, output_stride=16, multigrid=None, stem_type='v0', last_stage_repeats=1, se_ratio=0.0, stochastic_depth_drop_rate=0.0, resnetd_shortcut=False, replace_stem_max_pool=False), revnet=RevNet(model_id=56), efficientnet=EfficientNet(model_id='b0', se_ratio=0.0, stochastic_depth_drop_rate=0.0), spinenet=SpineNet(model_id='49', stochastic_depth_drop_rate=0.0, min_level=3, max_level=7), spinenet_mobile=SpineNetMobile(model_id='49', stochastic_depth_drop_rate=0.0, se_ratio=0.2, expand_ratio=6, min_level=3, max_level=7, use_keras_upsampling_2d=False), mobilenet=MobileNet(model_id='MobileNetV2', filter_size_scale=1.0, stochastic_depth_drop_rate=0.0, output_stride=None, output_intermediate_endpoints=False), mobiledet=MobileDet(model_id='MobileDetCPU', filter_size_scale=1.0), vit=VisionTransformer(model_name='vit-b16', pooler='token', representation_size=0, hidden_size=1, patch_size=16, transformer=Transformer(mlp_dim=1, num_heads=1, num_layers=1, attention_dropout_rate=0.0, dropout_rate=0.0), init_stochastic_depth_rate=0.0, original_init=True, pos_embed_shape=None, output_encoded_tokens=True, output_2d_feature_maps=False, layer_scale_init_value=0.0, transformer_partition_dims=None)), decoder=Decoder(type='fpn', fpn=FPN(num_filters=256, fusion_type='sum', use_separable_conv=False, use_keras_layer=False), nasfpn=NASFPN(num_filters=256, num_repeats=5, use_separable_conv=False), identity=Identity(), aspp=ASPP(level=4, dilation_rates=[], dropout_rate=0.0, num_filters=256, use_depthwise_convolution=False, pool_kernel_size=None, spp_layer_version='v1', output_tensor=False)), head=RetinaNetHead(num_convs=4, num_filters=256, use_separable_conv=False, attribute_heads=[], share_classification_heads=False, share_level_convs=True), detection_generator=DetectionGenerator(apply_nms=True, pre_nms_top_k=5000, pre_nms_score_threshold=0.05, nms_iou_threshold=0.5, max_num_detections=100, nms_version='v2', use_cpu_nms=False, soft_nms_sigma=None, tflite_post_processing=TFLitePostProcessingConfig(max_detections=200, max_classes_per_detection=5, use_regular_nms=False, nms_score_threshold=0.1, nms_iou_threshold=0.5, normalize_anchor_coordinates=False, omit_nms=False, detections_per_class=5, y_scale=1.0, x_scale=1.0, w_scale=1.0, h_scale=1.0), return_decoded=None, use_class_agnostic_nms=False, box_coder_weights=None), norm_activation=NormActivation(activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001)), train_data=DataConfig(input_path='coco/train*', tfds_name='', tfds_split='', global_batch_size=256, is_training=True, drop_remainder=True, shuffle_buffer_size=10000, cache=False, cycle_length=None, block_length=1, ram_budget=None, deterministic=None, sharding=True, enable_tf_data_service=False, tf_data_service_address=None, tf_data_service_job_name=None, tfds_data_dir='', tfds_as_supervised=False, tfds_skip_decoding_feature='', enable_shared_tf_data_service_between_parallel_trainers=False, apply_tf_data_service_before_batching=False, trainer_id=None, seed=None, prefetch_buffer_size=None, autotune_algorithm=None, weights=None, dtype='bfloat16', decoder=DataDecoder(type='simple_decoder', simple_decoder=TfExampleDecoder(regenerate_source_id=False, mask_binarize_threshold=None, attribute_names=[]), label_map_decoder=TfExampleDecoderLabelMap(regenerate_source_id=False, mask_binarize_threshold=None, label_map='')), parser=Parser(num_channels=3, match_threshold=0.5, unmatched_threshold=0.5, aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.2, skip_crowd_during_training=True, max_num_instances=100, aug_type=None, pad=True, keep_aspect_ratio=True, aug_policy=None), file_type='tfrecord'), validation_data=DataConfig(input_path='coco/val*', tfds_name='', tfds_split='', global_batch_size=8, is_training=False, drop_remainder=True, shuffle_buffer_size=10000, cache=False, cycle_length=None, block_length=1, ram_budget=None, deterministic=None, sharding=True, enable_tf_data_service=False, tf_data_service_address=None, tf_data_service_job_name=None, tfds_data_dir='', tfds_as_supervised=False, tfds_skip_decoding_feature='', enable_shared_tf_data_service_between_parallel_trainers=False, apply_tf_data_service_before_batching=False, trainer_id=None, seed=None, prefetch_buffer_size=None, autotune_algorithm=None, weights=None, dtype='bfloat16', decoder=DataDecoder(type='simple_decoder', simple_decoder=TfExampleDecoder(regenerate_source_id=False, mask_binarize_threshold=None, attribute_names=[]), label_map_decoder=TfExampleDecoderLabelMap(regenerate_source_id=False, mask_binarize_threshold=None, label_map='')), parser=Parser(num_channels=3, match_threshold=0.5, unmatched_threshold=0.5, aug_rand_hflip=False, aug_scale_min=1.0, aug_scale_max=1.0, skip_crowd_during_training=True, max_num_instances=100, aug_type=None, pad=True, keep_aspect_ratio=True, aug_policy=None), file_type='tfrecord'), name=None, differential_privacy_config=None, allow_image_summary=False, losses=Losses(loss_weight=1.0, focal_loss_alpha=0.25, focal_loss_gamma=1.5, huber_loss_delta=0.1, box_loss_weight=50, l2_weight_decay=0.0001), init_checkpoint_modules='backbone', annotation_file='coco/instances_val2017.json', per_category_metrics=False, export_config=ExportConfig(output_normalized_coordinates=False, cast_num_detections_to_float=False, cast_detection_classes_to_float=False, output_intermediate_features=False), use_coco_metrics=True, use_wod_metrics=False, freeze_backbone=False, max_num_eval_detections=100), trainer=TrainerConfig(optimizer_config=OptimizationConfig(optimizer=OptimizerConfig(type='sgd', sgd=SGDConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='SGD', decay=0.0, nesterov=False, momentum=0.9), sgd_experimental=SGDExperimentalConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='SGD', nesterov=False, momentum=0.0, jit_compile=False), adam=AdamConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='Adam', beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False), adam_experimental=AdamExperimentalConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='Adam', beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, jit_compile=False), adamw=AdamWeightDecayConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='AdamWeightDecay', beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, weight_decay_rate=0.0, include_in_weight_decay=None, exclude_from_weight_decay=None, gradient_clip_norm=1.0), adamw_experimental=AdamWeightDecayExperimentalConfig(clipnorm=None, clipvalue=None, global_clipnorm=1.0, name='AdamWeightDecayExperimental', beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, weight_decay=0.0, jit_compile=False), lamb=LAMBConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='LAMB', beta_1=0.9, beta_2=0.999, epsilon=1e-06, weight_decay_rate=0.0, exclude_from_weight_decay=None, exclude_from_layer_adaptation=None), rmsprop=RMSPropConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='RMSprop', rho=0.9, momentum=0.0, epsilon=1e-07, centered=False), lars=LARSConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='LARS', momentum=0.9, eeta=0.001, weight_decay_rate=0.0, nesterov=False, classic_momentum=True, exclude_from_weight_decay=None, exclude_from_layer_adaptation=None), adagrad=AdagradConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='Adagrad', initial_accumulator_value=0.1, epsilon=1e-07), slide=SLIDEConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='SLIDE', beta_1=0.9, beta_2=0.999, epsilon=1e-06, weight_decay_rate=0.0, weight_decay_type='inner', exclude_from_weight_decay=None, exclude_from_layer_adaptation=None, include_in_sparse_layer_adaptation=None, sparse_layer_learning_rate=0.1, do_gradient_rescaling=True, norm_type='layer', ratio_clip_norm=100000.0), adafactor=AdafactorConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='Adafactor', factored=True, multiply_by_parameter_scale=True, beta1=None, decay_rate=0.8, step_offset=0, clipping_threshold=1.0, min_dim_size_to_factor=128, epsilon1=1e-30, epsilon2=0.001, weight_decay=None, include_in_weight_decay=None), adafactor_keras=AdafactorKerasConfig(clipnorm=None, clipvalue=None, global_clipnorm=None, name='Adafactor', learning_rate=0.001, beta_2_decay=-0.8, epsilon_1=1e-30, epsilon_2=0.001, clip_threshold=1.0, relative_step=True)), ema=None, learning_rate=LrConfig(type='stepwise', constant=ConstantLrConfig(name='Constant', learning_rate=0.1), stepwise=StepwiseLrConfig(name='PiecewiseConstantDecay', boundaries=[26334, 30954], values=[0.32, 0.032, 0.0032], offset=0), exponential=ExponentialLrConfig(name='ExponentialDecay', initial_learning_rate=None, decay_steps=None, decay_rate=None, staircase=None, offset=0), polynomial=PolynomialLrConfig(name='PolynomialDecay', initial_learning_rate=None, decay_steps=None, end_learning_rate=0.0001, power=1.0, cycle=False, offset=0), cosine=CosineLrConfig(name='CosineDecay', initial_learning_rate=None, decay_steps=None, alpha=0.0, offset=0), power=DirectPowerLrConfig(name='DirectPowerDecay', initial_learning_rate=None, power=-0.5), power_linear=PowerAndLinearDecayLrConfig(name='PowerAndLinearDecay', initial_learning_rate=None, total_decay_steps=None, power=-0.5, linear_decay_fraction=0.1, offset=0), power_with_offset=PowerDecayWithOffsetLrConfig(name='PowerDecayWithOffset', initial_learning_rate=None, power=-0.5, offset=0, pre_offset_learning_rate=1000000.0), step_cosine_with_offset=StepCosineLrConfig(name='StepCosineDecayWithOffset', boundaries=None, values=None, offset=0)), warmup=WarmupConfig(type='linear', linear=LinearWarmupConfig(name='linear', warmup_learning_rate=0.0067, warmup_steps=500), polynomial=PolynomialWarmupConfig(name='polynomial', power=1, warmup_steps=None))), train_tf_while_loop=True, train_tf_function=True, eval_tf_function=True, eval_tf_while_loop=False, allow_tpu_summary=False, steps_per_loop=462, summary_interval=462, checkpoint_interval=462, max_to_keep=5, continuous_eval_timeout=3600, train_steps=33264, validation_steps=625, validation_interval=462, best_checkpoint_export_subdir='', best_checkpoint_eval_metric='', best_checkpoint_metric_comp='higher', loss_upper_bound=1000000.0, recovery_begin_steps=0, recovery_max_trials=0, validation_summary_subdir='validation', preemption_on_demand_checkpoint=True), runtime=RuntimeConfig(distribution_strategy='mirrored', enable_xla=False, gpu_thread_mode=None, dataset_num_private_threads=None, per_gpu_thread_count=0, tpu=None, num_gpus=0, worker_hosts=None, task_index=-1, all_reduce_alg=None, num_packs=1, mixed_precision_dtype='bfloat16', loss_scale=None, run_eagerly=False, batchnorm_spatial_persistent=False, tpu_enable_xla_dynamic_padder=None, num_cores_per_replica=1, default_shard_dim=-1, use_tpu_mp_strategy=False))

Adjust the model and dataset configurations so that it works with custom dataset(in this case BCCD).¶

In [ ]:
batch_size = 8
num_classes = 3

HEIGHT, WIDTH = 256, 256
IMG_SIZE = [HEIGHT, WIDTH, 3]

# Backbone config.
exp_config.task.freeze_backbone = False
exp_config.task.annotation_file = ''

# Model config.
exp_config.task.model.input_size = IMG_SIZE
exp_config.task.model.num_classes = num_classes + 1
exp_config.task.model.detection_generator.tflite_post_processing.max_classes_per_detection = exp_config.task.model.num_classes

# Training data config.
exp_config.task.train_data.input_path = train_data_input_path
exp_config.task.train_data.dtype = 'float32'
exp_config.task.train_data.global_batch_size = batch_size
exp_config.task.train_data.parser.aug_scale_max = 1.0
exp_config.task.train_data.parser.aug_scale_min = 1.0

# Validation data config.
exp_config.task.validation_data.input_path = valid_data_input_path
exp_config.task.validation_data.dtype = 'float32'
exp_config.task.validation_data.global_batch_size = batch_size

Adjust the trainer configuration.¶

In [ ]:
logical_device_names = [logical_device.name for logical_device in tf.config.list_logical_devices()]

if 'GPU' in ''.join(logical_device_names):
  print('This may be broken in Colab.')
  device = 'GPU'
elif 'TPU' in ''.join(logical_device_names):
  print('This may be broken in Colab.')
  device = 'TPU'
else:
  print('Running on CPU is slow, so only train for a few steps.')
  device = 'CPU'


train_steps = 1000
exp_config.trainer.steps_per_loop = 100 # steps_per_loop = num_of_training_examples // train_batch_size

exp_config.trainer.summary_interval = 100
exp_config.trainer.checkpoint_interval = 100
exp_config.trainer.validation_interval = 100
exp_config.trainer.validation_steps =  100 # validation_steps = num_of_validation_examples // eval_batch_size
exp_config.trainer.train_steps = train_steps
exp_config.trainer.optimizer_config.warmup.linear.warmup_steps = 100
exp_config.trainer.optimizer_config.learning_rate.type = 'cosine'
exp_config.trainer.optimizer_config.learning_rate.cosine.decay_steps = train_steps
exp_config.trainer.optimizer_config.learning_rate.cosine.initial_learning_rate = 0.1
exp_config.trainer.optimizer_config.warmup.linear.warmup_learning_rate = 0.05
This may be broken in Colab.

Print the modified configuration.¶

In [ ]:
pp.pprint(exp_config.as_dict())
display.Javascript('google.colab.output.setIframeHeight("500px");')
{   'runtime': {   'all_reduce_alg': None,
                   'batchnorm_spatial_persistent': False,
                   'dataset_num_private_threads': None,
                   'default_shard_dim': -1,
                   'distribution_strategy': 'mirrored',
                   'enable_xla': False,
                   'gpu_thread_mode': None,
                   'loss_scale': None,
                   'mixed_precision_dtype': 'bfloat16',
                   'num_cores_per_replica': 1,
                   'num_gpus': 0,
                   'num_packs': 1,
                   'per_gpu_thread_count': 0,
                   'run_eagerly': False,
                   'task_index': -1,
                   'tpu': None,
                   'tpu_enable_xla_dynamic_padder': None,
                   'use_tpu_mp_strategy': False,
                   'worker_hosts': None},
    'task': {   'allow_image_summary': False,
                'annotation_file': '',
                'differential_privacy_config': None,
                'export_config': {   'cast_detection_classes_to_float': False,
                                     'cast_num_detections_to_float': False,
                                     'output_intermediate_features': False,
                                     'output_normalized_coordinates': False},
                'freeze_backbone': False,
                'init_checkpoint': 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
                'init_checkpoint_modules': 'backbone',
                'losses': {   'box_loss_weight': 50,
                              'focal_loss_alpha': 0.25,
                              'focal_loss_gamma': 1.5,
                              'huber_loss_delta': 0.1,
                              'l2_weight_decay': 0.0001,
                              'loss_weight': 1.0},
                'max_num_eval_detections': 100,
                'model': {   'anchor': {   'anchor_size': 4.0,
                                           'aspect_ratios': [0.5, 1.0, 2.0],
                                           'num_scales': 3},
                             'backbone': {   'resnet': {   'bn_trainable': True,
                                                           'depth_multiplier': 1.0,
                                                           'model_id': 50,
                                                           'replace_stem_max_pool': False,
                                                           'resnetd_shortcut': False,
                                                           'scale_stem': True,
                                                           'se_ratio': 0.0,
                                                           'stem_type': 'v0',
                                                           'stochastic_depth_drop_rate': 0.0},
                                             'type': 'resnet'},
                             'decoder': {   'fpn': {   'fusion_type': 'sum',
                                                       'num_filters': 256,
                                                       'use_keras_layer': False,
                                                       'use_separable_conv': False},
                                            'type': 'fpn'},
                             'detection_generator': {   'apply_nms': True,
                                                        'box_coder_weights': None,
                                                        'max_num_detections': 100,
                                                        'nms_iou_threshold': 0.5,
                                                        'nms_version': 'v2',
                                                        'pre_nms_score_threshold': 0.05,
                                                        'pre_nms_top_k': 5000,
                                                        'return_decoded': None,
                                                        'soft_nms_sigma': None,
                                                        'tflite_post_processing': {   'detections_per_class': 5,
                                                                                      'h_scale': 1.0,
                                                                                      'max_classes_per_detection': 4,
                                                                                      'max_detections': 200,
                                                                                      'nms_iou_threshold': 0.5,
                                                                                      'nms_score_threshold': 0.1,
                                                                                      'normalize_anchor_coordinates': False,
                                                                                      'omit_nms': False,
                                                                                      'use_regular_nms': False,
                                                                                      'w_scale': 1.0,
                                                                                      'x_scale': 1.0,
                                                                                      'y_scale': 1.0},
                                                        'use_class_agnostic_nms': False,
                                                        'use_cpu_nms': False},
                             'head': {   'attribute_heads': [],
                                         'num_convs': 4,
                                         'num_filters': 256,
                                         'share_classification_heads': False,
                                         'share_level_convs': True,
                                         'use_separable_conv': False},
                             'input_size': [256, 256, 3],
                             'max_level': 7,
                             'min_level': 3,
                             'norm_activation': {   'activation': 'relu',
                                                    'norm_epsilon': 0.001,
                                                    'norm_momentum': 0.99,
                                                    'use_sync_bn': False},
                             'num_classes': 4},
                'name': None,
                'per_category_metrics': False,
                'train_data': {   'apply_tf_data_service_before_batching': False,
                                  'autotune_algorithm': None,
                                  'block_length': 1,
                                  'cache': False,
                                  'cycle_length': None,
                                  'decoder': {   'simple_decoder': {   'attribute_names': [   ],
                                                                       'mask_binarize_threshold': None,
                                                                       'regenerate_source_id': False},
                                                 'type': 'simple_decoder'},
                                  'deterministic': None,
                                  'drop_remainder': True,
                                  'dtype': 'float32',
                                  'enable_shared_tf_data_service_between_parallel_trainers': False,
                                  'enable_tf_data_service': False,
                                  'file_type': 'tfrecord',
                                  'global_batch_size': 8,
                                  'input_path': './aquarium_tfrecords/train-00000-of-00001.tfrecord',
                                  'is_training': True,
                                  'parser': {   'aug_policy': None,
                                                'aug_rand_hflip': True,
                                                'aug_scale_max': 1.0,
                                                'aug_scale_min': 1.0,
                                                'aug_type': None,
                                                'keep_aspect_ratio': True,
                                                'match_threshold': 0.5,
                                                'max_num_instances': 100,
                                                'num_channels': 3,
                                                'pad': True,
                                                'skip_crowd_during_training': True,
                                                'unmatched_threshold': 0.5},
                                  'prefetch_buffer_size': None,
                                  'ram_budget': None,
                                  'seed': None,
                                  'sharding': True,
                                  'shuffle_buffer_size': 10000,
                                  'tf_data_service_address': None,
                                  'tf_data_service_job_name': None,
                                  'tfds_as_supervised': False,
                                  'tfds_data_dir': '',
                                  'tfds_name': '',
                                  'tfds_skip_decoding_feature': '',
                                  'tfds_split': '',
                                  'trainer_id': None,
                                  'weights': None},
                'use_coco_metrics': True,
                'use_wod_metrics': False,
                'validation_data': {   'apply_tf_data_service_before_batching': False,
                                       'autotune_algorithm': None,
                                       'block_length': 1,
                                       'cache': False,
                                       'cycle_length': None,
                                       'decoder': {   'simple_decoder': {   'attribute_names': [   ],
                                                                            'mask_binarize_threshold': None,
                                                                            'regenerate_source_id': False},
                                                      'type': 'simple_decoder'},
                                       'deterministic': None,
                                       'drop_remainder': True,
                                       'dtype': 'float32',
                                       'enable_shared_tf_data_service_between_parallel_trainers': False,
                                       'enable_tf_data_service': False,
                                       'file_type': 'tfrecord',
                                       'global_batch_size': 8,
                                       'input_path': './aquarium_tfrecords/valid-00000-of-00001.tfrecord',
                                       'is_training': False,
                                       'parser': {   'aug_policy': None,
                                                     'aug_rand_hflip': False,
                                                     'aug_scale_max': 1.0,
                                                     'aug_scale_min': 1.0,
                                                     'aug_type': None,
                                                     'keep_aspect_ratio': True,
                                                     'match_threshold': 0.5,
                                                     'max_num_instances': 100,
                                                     'num_channels': 3,
                                                     'pad': True,
                                                     'skip_crowd_during_training': True,
                                                     'unmatched_threshold': 0.5},
                                       'prefetch_buffer_size': None,
                                       'ram_budget': None,
                                       'seed': None,
                                       'sharding': True,
                                       'shuffle_buffer_size': 10000,
                                       'tf_data_service_address': None,
                                       'tf_data_service_job_name': None,
                                       'tfds_as_supervised': False,
                                       'tfds_data_dir': '',
                                       'tfds_name': '',
                                       'tfds_skip_decoding_feature': '',
                                       'tfds_split': '',
                                       'trainer_id': None,
                                       'weights': None}},
    'trainer': {   'allow_tpu_summary': False,
                   'best_checkpoint_eval_metric': '',
                   'best_checkpoint_export_subdir': '',
                   'best_checkpoint_metric_comp': 'higher',
                   'checkpoint_interval': 100,
                   'continuous_eval_timeout': 3600,
                   'eval_tf_function': True,
                   'eval_tf_while_loop': False,
                   'loss_upper_bound': 1000000.0,
                   'max_to_keep': 5,
                   'optimizer_config': {   'ema': None,
                                           'learning_rate': {   'cosine': {   'alpha': 0.0,
                                                                              'decay_steps': 1000,
                                                                              'initial_learning_rate': 0.1,
                                                                              'name': 'CosineDecay',
                                                                              'offset': 0},
                                                                'type': 'cosine'},
                                           'optimizer': {   'sgd': {   'clipnorm': None,
                                                                       'clipvalue': None,
                                                                       'decay': 0.0,
                                                                       'global_clipnorm': None,
                                                                       'momentum': 0.9,
                                                                       'name': 'SGD',
                                                                       'nesterov': False},
                                                            'type': 'sgd'},
                                           'warmup': {   'linear': {   'name': 'linear',
                                                                       'warmup_learning_rate': 0.05,
                                                                       'warmup_steps': 100},
                                                         'type': 'linear'}},
                   'preemption_on_demand_checkpoint': True,
                   'recovery_begin_steps': 0,
                   'recovery_max_trials': 0,
                   'steps_per_loop': 100,
                   'summary_interval': 100,
                   'train_steps': 1000,
                   'train_tf_function': True,
                   'train_tf_while_loop': True,
                   'validation_interval': 100,
                   'validation_steps': 100,
                   'validation_summary_subdir': 'validation'}}
Out[ ]:

Set up the distribution strategy.¶

In [ ]:
if exp_config.runtime.mixed_precision_dtype == tf.float16:
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

if 'GPU' in ''.join(logical_device_names):
  distribution_strategy = tf.distribute.MirroredStrategy()
elif 'TPU' in ''.join(logical_device_names):
  tf.tpu.experimental.initialize_tpu_system()
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='/device:TPU_SYSTEM:0')
  distribution_strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
  print('Warning: this will be really slow.')
  distribution_strategy = tf.distribute.OneDeviceStrategy(logical_device_names[0])

print('Done')
Done

Create the Task object (tfm.core.base_task.Task) from the config_definitions.TaskConfig.¶

In [ ]:
with distribution_strategy.scope():
  task = tfm.core.task_factory.get_task(exp_config.task, logging_dir=model_dir)

Visualize a batch of the data.¶

In [ ]:
for images, labels in task.build_inputs(exp_config.task.train_data).take(1):
  print()
  print(f'images.shape: {str(images.shape):16}  images.dtype: {images.dtype!r}')
  print(f'labels.keys: {labels.keys()}')
images.shape: (8, 256, 256, 3)  images.dtype: tf.float32
labels.keys: dict_keys(['cls_targets', 'box_targets', 'anchor_boxes', 'cls_weights', 'box_weights', 'image_info'])

Create category index dictionary to map the labels to coressponding label names.¶

In [ ]:
category_index={
    0: {
        'id': 0,
        'name': 'creatures'
       },
    1: {
        'id': 1,
        'name': 'fish'
       },
    2: {
        'id': 2,
        'name': 'jellyfish'
       },
    3: {
        'id': 3,
        'name': 'penguin'
       },
    4: {
        'id': 4,
        'name': 'puffin'
       },
    5: {
        'id': 5,
        'name': 'shark'
       },
    6: {
        'id': 6,
        'name': 'starfish'
       },
    7: {
        'id': 7,
        'name': 'stingray'
       }
}
tf_ex_decoder = TfExampleDecoder()

Helper function for visualizing the results from TFRecords.¶

Use visualize_boxes_and_labels_on_image_array from visualization_utils to draw boudning boxes on the image.

In [ ]:
def show_batch(raw_records, num_of_examples):
  plt.figure(figsize=(20, 20))
  use_normalized_coordinates=True
  min_score_thresh = 0.30
  for i, serialized_example in enumerate(raw_records):
    plt.subplot(1, 3, i + 1)
    decoded_tensors = tf_ex_decoder.decode(serialized_example)
    image = decoded_tensors['image'].numpy().astype('uint8')
    scores = np.ones(shape=(len(decoded_tensors['groundtruth_boxes'])))
    visualization_utils.visualize_boxes_and_labels_on_image_array(
        image,
        decoded_tensors['groundtruth_boxes'].numpy(),
        decoded_tensors['groundtruth_classes'].numpy().astype('int'),
        scores,
        category_index=category_index,
        use_normalized_coordinates=use_normalized_coordinates,
        max_boxes_to_draw=200,
        min_score_thresh=min_score_thresh,
        agnostic_mode=False,
        instance_masks=None,
        line_thickness=4)

    plt.imshow(image)
    plt.axis('off')
    plt.title(f'Image-{i+1}')
  plt.show()

Visualization of train data¶

The bounding box detection has two components

Class label of the object detected (e.g.RBC)
Percentage of match between predicted and ground truth bounding boxes.

Note: The reason of everything is 100% is because we are visualising the groundtruth.

In [ ]:
buffer_size = 20
num_of_examples = 3

raw_records = tf.data.TFRecordDataset(
    exp_config.task.train_data.input_path).shuffle(
        buffer_size=buffer_size).take(num_of_examples)
show_batch(raw_records, num_of_examples)

show_batch(raw_records, num_of_examples)
No description has been provided for this image
No description has been provided for this image

Train and evaluate.¶

We follow the COCO challenge tradition to evaluate the accuracy of object detection based on mAP(mean Average Precision). Please check here for detail explanation of how evaluation metrics for detection task is done.

IoU: is defined as the area of the intersection divided by the area of the union of a predicted bounding box and ground truth bounding box.

It May take while for 100 epochs.

In [ ]:
model, eval_logs = tfm.core.train_lib.run_experiment(
    distribution_strategy=distribution_strategy,
    task=task,
    mode='train_and_eval',
    params=exp_config,
    model_dir=model_dir,
    run_post_eval=True)
restoring or initializing model...
train | step:      0 | training until step 100...
train | step:    100 | steps/sec:    1.2 | output: 
    {'box_loss': 0.02299292,
     'cls_loss': 0.70729965,
     'learning_rate': 0.09755283,
     'model_loss': 1.8569458,
     'total_loss': 2.5598838,
     'training_loss': 2.5598838}
saved checkpoint to ./trained_model/ckpt-100.
 eval | step:    100 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=2.12s).
Accumulating evaluation results...
DONE (t=0.23s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.005
 eval | step:    100 | steps/sec:    3.9 | eval time:   25.4 sec | output: 
    {'AP': 3.6709005e-06,
     'AP50': 1.8612207e-05,
     'AP75': 0.0,
     'APl': 8.301182e-06,
     'APm': 0.0,
     'APs': 0.0,
     'ARl': 0.0050045494,
     'ARm': 0.0,
     'ARmax1': 0.0,
     'ARmax10': 0.00057008717,
     'ARmax100': 0.0018443997,
     'ARs': 0.0,
     'box_loss': 0.2946926,
     'cls_loss': 118900470.0,
     'model_loss': 118900490.0,
     'steps_per_second': 3.9322963523760186,
     'total_loss': 118900490.0,
     'validation_loss': 118900490.0}
train | step:    100 | training until step 200...
train | step:    200 | steps/sec:    1.8 | output: 
    {'box_loss': 0.01479636,
     'cls_loss': 0.6349316,
     'learning_rate': 0.090450846,
     'model_loss': 1.3747497,
     'total_loss': 2.2306504,
     'training_loss': 2.2306504}
saved checkpoint to ./trained_model/ckpt-200.
 eval | step:    200 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.08s).
Accumulating evaluation results...
DONE (t=0.13s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.005
 eval | step:    200 | steps/sec:   14.2 | eval time:    7.0 sec | output: 
    {'AP': 3.8083363e-06,
     'AP50': 2.0784348e-05,
     'AP75': 0.0,
     'APl': 7.336751e-06,
     'APm': 2.4355377e-05,
     'APs': 0.0,
     'ARl': 0.0046405825,
     'ARm': 0.00017636684,
     'ARmax1': 0.0,
     'ARmax10': 0.0003353454,
     'ARmax100': 0.0018108652,
     'ARs': 0.0,
     'box_loss': 0.013814812,
     'cls_loss': 31640.219,
     'model_loss': 31640.912,
     'steps_per_second': 14.185334637856629,
     'total_loss': 31641.795,
     'validation_loss': 31641.795}
train | step:    200 | training until step 300...
train | step:    300 | steps/sec:    2.7 | output: 
    {'box_loss': 0.013815149,
     'cls_loss': 0.5799069,
     'learning_rate': 0.07938927,
     'model_loss': 1.2706643,
     'total_loss': 2.1497295,
     'training_loss': 2.1497295}
saved checkpoint to ./trained_model/ckpt-300.
 eval | step:    300 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.01s).
Accumulating evaluation results...
DONE (t=0.15s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.005
 eval | step:    300 | steps/sec:   11.9 | eval time:    8.4 sec | output: 
    {'AP': 3.1728236e-05,
     'AP50': 0.00015873539,
     'AP75': 0.0,
     'APl': 0.00014956687,
     'APm': 2.7733865e-06,
     'APs': 0.0,
     'ARl': 0.0053760996,
     'ARm': 5.8788948e-05,
     'ARmax1': 0.0001676727,
     'ARmax10': 0.0010151524,
     'ARmax100': 0.0018535159,
     'ARs': 0.0,
     'box_loss': 0.01368435,
     'cls_loss': 14.919177,
     'model_loss': 15.603395,
     'steps_per_second': 11.949066593127592,
     'total_loss': 16.47536,
     'validation_loss': 16.47536}
train | step:    300 | training until step 400...
train | step:    400 | steps/sec:    2.6 | output: 
    {'box_loss': 0.0139518855,
     'cls_loss': 0.57121444,
     'learning_rate': 0.06545085,
     'model_loss': 1.2688088,
     'total_loss': 2.134406,
     'training_loss': 2.134406}
saved checkpoint to ./trained_model/ckpt-400.
 eval | step:    400 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.11s).
Accumulating evaluation results...
DONE (t=0.13s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.003
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.006
 eval | step:    400 | steps/sec:   11.4 | eval time:    8.8 sec | output: 
    {'AP': 2.7894635e-05,
     'AP50': 0.00012279442,
     'AP75': 1.4208208e-06,
     'APl': 5.58696e-05,
     'APm': 0.0008486563,
     'APs': 0.0,
     'ARl': 0.0064604185,
     'ARm': 0.00052910054,
     'ARmax1': 3.353454e-05,
     'ARmax10': 0.0006036217,
     'ARmax100': 0.0026827632,
     'ARs': 0.0,
     'box_loss': 0.01361202,
     'cls_loss': 0.5935602,
     'model_loss': 1.2741612,
     'steps_per_second': 11.374738157942874,
     'total_loss': 2.133651,
     'validation_loss': 2.133651}
train | step:    400 | training until step 500...
train | step:    500 | steps/sec:    2.6 | output: 
    {'box_loss': 0.014360726,
     'cls_loss': 0.5609584,
     'learning_rate': 0.049999997,
     'model_loss': 1.2789946,
     'total_loss': 2.1333876,
     'training_loss': 2.1333876}
saved checkpoint to ./trained_model/ckpt-500.
 eval | step:    500 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.86s).
Accumulating evaluation results...
DONE (t=0.25s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.005
 eval | step:    500 | steps/sec:    9.2 | eval time:   10.8 sec | output: 
    {'AP': 1.2444041e-05,
     'AP50': 8.6922904e-05,
     'AP75': 5.4380126e-07,
     'APl': 3.2068932e-05,
     'APm': 5.635168e-07,
     'APs': 0.0,
     'ARl': 0.004913558,
     'ARm': 5.8788948e-05,
     'ARmax1': 0.0,
     'ARmax10': 0.00063715625,
     'ARmax100': 0.0018443997,
     'ARs': 0.0,
     'box_loss': 0.013682727,
     'cls_loss': 0.5865962,
     'model_loss': 1.2707326,
     'steps_per_second': 9.244308324486969,
     'total_loss': 2.1203787,
     'validation_loss': 2.1203787}
train | step:    500 | training until step 600...
train | step:    600 | steps/sec:    2.5 | output: 
    {'box_loss': 0.014248698,
     'cls_loss': 0.5383383,
     'learning_rate': 0.034549143,
     'model_loss': 1.2507733,
     'total_loss': 2.0965848,
     'training_loss': 2.0965848}
saved checkpoint to ./trained_model/ckpt-600.
 eval | step:    600 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.77s).
Accumulating evaluation results...
DONE (t=0.14s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.009
 eval | step:    600 | steps/sec:   10.8 | eval time:    9.3 sec | output: 
    {'AP': 6.514979e-05,
     'AP50': 0.00029717758,
     'AP75': 2.8865861e-05,
     'APl': 0.00011936904,
     'APm': 0.0007563307,
     'APs': 0.0,
     'ARl': 0.0088262055,
     'ARm': 0.0010582011,
     'ARmax1': 0.00020120724,
     'ARmax10': 0.0007042253,
     'ARmax100': 0.0038564722,
     'ARs': 0.0,
     'box_loss': 0.013620645,
     'cls_loss': 0.57616067,
     'model_loss': 1.2571929,
     'steps_per_second': 10.753361983129007,
     'total_loss': 2.0995522,
     'validation_loss': 2.0995522}
train | step:    600 | training until step 700...
train | step:    700 | steps/sec:    2.6 | output: 
    {'box_loss': 0.014085476,
     'cls_loss': 0.50065005,
     'learning_rate': 0.020610739,
     'model_loss': 1.204924,
     'total_loss': 2.0447357,
     'training_loss': 2.0447357}
saved checkpoint to ./trained_model/ckpt-700.
 eval | step:    700 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.30s).
Accumulating evaluation results...
DONE (t=0.25s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.009
 eval | step:    700 | steps/sec:   11.8 | eval time:    8.5 sec | output: 
    {'AP': 4.3932916e-05,
     'AP50': 0.00016571624,
     'AP75': 4.9698775e-07,
     'APl': 8.430273e-05,
     'APm': 0.0007072136,
     'APs': 0.0,
     'ARl': 0.009118195,
     'ARm': 0.00041152263,
     'ARmax1': 0.0,
     'ARmax10': 0.0017764652,
     'ARmax100': 0.0038804873,
     'ARs': 0.0,
     'box_loss': 0.013559979,
     'cls_loss': 0.5797829,
     'model_loss': 1.2577821,
     'steps_per_second': 11.753483041042912,
     'total_loss': 2.0953832,
     'validation_loss': 2.0953832}
train | step:    700 | training until step 800...
train | step:    800 | steps/sec:    2.6 | output: 
    {'box_loss': 0.014484431,
     'cls_loss': 0.4944819,
     'learning_rate': 0.009549147,
     'model_loss': 1.2187034,
     'total_loss': 2.0548458,
     'training_loss': 2.0548458}
saved checkpoint to ./trained_model/ckpt-800.
 eval | step:    800 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.07s).
Accumulating evaluation results...
DONE (t=0.13s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.006
 eval | step:    800 | steps/sec:   12.4 | eval time:    8.1 sec | output: 
    {'AP': 6.4558626e-05,
     'AP50': 0.00030914438,
     'AP75': 2.7200522e-05,
     'APl': 8.1441285e-05,
     'APm': 0.0005420082,
     'APs': 0.0,
     'ARl': 0.006408811,
     'ARm': 0.0019656257,
     'ARmax1': 3.353454e-05,
     'ARmax10': 0.001558599,
     'ARmax100': 0.0036042058,
     'ARs': 0.0,
     'box_loss': 0.013548499,
     'cls_loss': 0.56659806,
     'model_loss': 1.2440231,
     'steps_per_second': 12.364586417315936,
     'total_loss': 2.0789967,
     'validation_loss': 2.0789967}
train | step:    800 | training until step 900...
train | step:    900 | steps/sec:    2.6 | output: 
    {'box_loss': 0.013828442,
     'cls_loss': 0.48287526,
     'learning_rate': 0.002447176,
     'model_loss': 1.1742971,
     'total_loss': 2.008646,
     'training_loss': 2.008646}
saved checkpoint to ./trained_model/ckpt-900.
 eval | step:    900 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.08s).
Accumulating evaluation results...
DONE (t=0.13s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.005
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.009
 eval | step:    900 | steps/sec:   11.2 | eval time:    8.9 sec | output: 
    {'AP': 9.020708e-05,
     'AP50': 0.00034493182,
     'AP75': 0.0,
     'APl': 9.0322486e-05,
     'APm': 0.00038884624,
     'APs': 0.0,
     'ARl': 0.008925346,
     'ARm': 0.0019278807,
     'ARmax1': 0.00046926722,
     'ARmax10': 0.0017447204,
     'ARmax100': 0.004528087,
     'ARs': 0.0,
     'box_loss': 0.013497253,
     'cls_loss': 0.5624246,
     'model_loss': 1.2372874,
     'steps_per_second': 11.173742404390573,
     'total_loss': 2.0712056,
     'validation_loss': 2.0712056}
train | step:    900 | training until step 1000...
train | step:   1000 | steps/sec:    2.6 | output: 
    {'box_loss': 0.014066147,
     'cls_loss': 0.50184095,
     'learning_rate': 0.0,
     'model_loss': 1.2051482,
     'total_loss': 2.0389369,
     'training_loss': 2.0389369}
saved checkpoint to ./trained_model/ckpt-1000.
 eval | step:   1000 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.06s).
Accumulating evaluation results...
DONE (t=0.14s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.007
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.014
 eval | step:   1000 | steps/sec:   11.9 | eval time:    8.4 sec | output: 
    {'AP': 0.00018147039,
     'AP50': 0.0006535421,
     'AP75': 3.449822e-05,
     'APl': 0.00026415233,
     'APm': 0.00043287792,
     'APs': 0.0,
     'ARl': 0.0138177965,
     'ARm': 0.004438832,
     'ARmax1': 0.0014392462,
     'ARmax10': 0.0042889784,
     'ARmax100': 0.007483876,
     'ARs': 0.0,
     'box_loss': 0.013497738,
     'cls_loss': 0.5564805,
     'model_loss': 1.2313673,
     'steps_per_second': 11.86848610520025,
     'total_loss': 2.0651052,
     'validation_loss': 2.0651052}
 eval | step:   1000 | running 100 steps of evaluation...
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.06s).
Accumulating evaluation results...
DONE (t=0.15s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.007
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.014
 eval | step:   1000 | steps/sec:   12.5 | eval time:    8.0 sec | output: 
    {'AP': 0.00018147039,
     'AP50': 0.0006535421,
     'AP75': 3.449822e-05,
     'APl': 0.00026415233,
     'APm': 0.00043287792,
     'APs': 0.0,
     'ARl': 0.0138177965,
     'ARm': 0.004438832,
     'ARmax1': 0.0014392462,
     'ARmax10': 0.0042889784,
     'ARmax100': 0.007483876,
     'ARs': 0.0,
     'box_loss': 0.013497738,
     'cls_loss': 0.5564805,
     'model_loss': 1.2313673,
     'steps_per_second': 12.523956227496434,
     'total_loss': 2.0651052,
     'validation_loss': 2.0651052}

Load logs in tensorboard.¶

In [ ]:
%load_ext tensorboard
%tensorboard --logdir './trained_model/'

Saving and exporting the trained model.¶

The keras.Model object returned by train_lib.run_experiment expects the data to be normalized by the dataset loader using the same mean and variance statiscics in preprocess_ops.normalize_image(image, offset=MEAN_RGB, scale=STDDEV_RGB). This export function handles those details, so you can pass tf.uint8 images and get the correct results.

In [ ]:
print(export_dir)
export_dir = '/content/gdrive/MyDrive/models/aquarium'
./exported_model/
In [ ]:
export_saved_model_lib.export_inference_graph(
    input_type='image_tensor',
    batch_size=1,
    input_image_size=[HEIGHT, WIDTH],
    params=exp_config,
    checkpoint_path=tf.train.latest_checkpoint(model_dir),
    export_dir=export_dir)
WARNING:tensorflow:Skipping full serialization of TF-Keras layer <official.vision.modeling.retinanet_model.RetinaNetModel object at 0x7f19dd5584c0>, because it is not built.
WARNING:tensorflow:Skipping full serialization of TF-Keras layer <official.vision.modeling.layers.detection_generator.MultilevelDetectionGenerator object at 0x7f19537ab340>, because it is not built.

Inference from trained model¶

In [ ]:
def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: the file path to the image

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  image = None
  if(path.startswith('http')):
    response = urlopen(path)
    image_data = response.read()
    image_data = BytesIO(image_data)
    image = Image.open(image_data)
  else:
    image_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(BytesIO(image_data))

  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (1, im_height, im_width, 3)).astype(np.uint8)



def build_inputs_for_object_detection(image, input_image_size):
  """Builds Object Detection model inputs for serving."""
  image, _ = resize_and_crop_image(
      image,
      input_image_size,
      padded_size=input_image_size,
      aug_scale_min=1.0,
      aug_scale_max=1.0)
  return image

Visualize test data.¶

In [ ]:
num_of_examples = 3

test_ds = tf.data.TFRecordDataset(
    './aquarium_tfrecords/test-00000-of-00001.tfrecord').take(
        num_of_examples)
show_batch(test_ds, num_of_examples)
show_batch(test_ds, num_of_examples)
No description has been provided for this image
No description has been provided for this image

Importing SavedModel.¶

In [ ]:
imported = tf.saved_model.load(export_dir)
model_fn = imported.signatures['serving_default']

Visualize predictions.¶

In [ ]:
input_image_size = (HEIGHT, WIDTH)
plt.figure(figsize=(20, 20))
min_score_thresh = 0.30 # Change minimum score for threshold to see all bounding boxes confidences.

for i, serialized_example in enumerate(test_ds):
  plt.subplot(1, 3, i+1)
  decoded_tensors = tf_ex_decoder.decode(serialized_example)
  image = build_inputs_for_object_detection(decoded_tensors['image'], input_image_size)
  image = tf.expand_dims(image, axis=0)
  image = tf.cast(image, dtype = tf.uint8)
  image_np = image[0].numpy()
  result = model_fn(image)
  visualization_utils.visualize_boxes_and_labels_on_image_array(
      image_np,
      result['detection_boxes'][0].numpy(),
      result['detection_classes'][0].numpy().astype(int),
      result['detection_scores'][0].numpy(),
      category_index=category_index,
      use_normalized_coordinates=False,
      max_boxes_to_draw=200,
      min_score_thresh=min_score_thresh,
      agnostic_mode=False,
      instance_masks=None,
      line_thickness=4)
  plt.imshow(image_np)
  plt.axis('off')

plt.show()